#确定movielens-100k里的每个用户与物品交互数，以及每个物品和用户的交互数

import numpy as np
from tqdm import tqdm

path = "F:\\LCW\\PycharmProject\\pytorch-learn\\RecSys\\dataset\\movielens-100k\\ml-100k.txt"

with open(path,'r') as f:
    data = f.readlines()
    # 读取
    user_itemcount = dict()
    user_list = set()
    for line in tqdm(data):
        user = line.split('\t')[0]
        item = line.split('\t')[1]
        if user not in user_itemcount:
            user_itemcount[user] = 1
            user_list.add(user)
        else:
            user_itemcount[user] = user_itemcount[user]+1
    # 根据物品交互数量从高到低排序
    user_itemcount = sorted(user_itemcount.items(), key=lambda x: x[1], reverse=True)
    print(user_itemcount)
    print(type(user_itemcount))


with open('user2item.txt','a') as w:
    for i in range(len(user_itemcount)):
        w.write(user_itemcount[i][0] + '\t' + str(user_itemcount[i][1])  + '\n')